attach(police) # attaching data to the work space
dim(police) # getting the number of rows and columns
## [1] 2384 47
#getting first 6 rows of the data
head(police)
## INCIDENT_DATE INCIDENT_TIME UOF_NUMBER OFFICER_ID OFFICER_GENDER
## 1 OCCURRED_D OCCURRED_T UOFNum CURRENT_BA OffSex
## 2 09-03-2016 04:14:00 AM 37702 10810 Male
## 3 3/22/16 11:00:00 PM 33413 7706 Male
## 4 5/22/16 01:29:00 PM 34567 11014 Male
## 5 01-10-2016 08:55:00 PM 31460 6692 Male
## 6 11-08-2016 02:30:00 AM 37879, 37898 9844 Male
## OFFICER_RACE OFFICER_HIRE_DATE OFFICER_YEARS_ON_FORCE OFFICER_INJURY
## 1 OffRace HIRE_DT INCIDENT_DATE_LESS_ OFF_INJURE
## 2 Black 05-07-2014 2 No
## 3 White 01-08-1999 17 Yes
## 4 Black 5/20/15 1 No
## 5 Black 7/29/91 24 No
## 6 White 10-04-2009 7 No
## OFFICER_INJURY_TYPE OFFICER_HOSPITALIZATION SUBJECT_ID SUBJECT_RACE
## 1 OFF_INJURE_DESC OFF_HOSPIT CitNum CitRace
## 2 No injuries noted or visible No 46424 Black
## 3 Sprain/Strain Yes 44324 Hispanic
## 4 No injuries noted or visible No 45126 Hispanic
## 5 No injuries noted or visible No 43150 Hispanic
## 6 No injuries noted or visible No 47307 Black
## SUBJECT_GENDER SUBJECT_INJURY SUBJECT_INJURY_TYPE
## 1 CitSex CIT_INJURE SUBJ_INJURE_DESC
## 2 Female Yes Non-Visible Injury/Pain
## 3 Male No No injuries noted or visible
## 4 Male No No injuries noted or visible
## 5 Male Yes Laceration/Cut
## 6 Male No No injuries noted or visible
## SUBJECT_WAS_ARRESTED SUBJECT_DESCRIPTION SUBJECT_OFFENSE
## 1 CIT_ARREST CIT_INFL_A CitChargeT
## 2 Yes Mentally unstable APOWW
## 3 Yes Mentally unstable APOWW
## 4 Yes Unknown APOWW
## 5 Yes FD-Unknown if Armed Evading Arrest
## 6 Yes Unknown Other Misdemeanor Arrest
## REPORTING_AREA BEAT SECTOR DIVISION LOCATION_DISTRICT STREET_NUMBER
## 1 RA BEAT SECTOR DIVISION DIST_NAME STREET_N
## 2 2062 134 130 CENTRAL D14 211
## 3 1197 237 230 NORTHEAST D9 7647
## 4 4153 432 430 SOUTHWEST D6 716
## 5 4523 641 640 NORTH CENTRAL D11 5600
## 6 2167 346 340 SOUTHEAST D7 4600
## STREET_NAME STREET_DIRECTION STREET_TYPE
## 1 STREET street_g street_t
## 2 Ervay N St.
## 3 Ferguson NULL Rd.
## 4 bimebella dr NULL Ln.
## 5 LBJ NULL Frwy.
## 6 Malcolm X S Blvd.
## LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION LOCATION_CITY LOCATION_STATE
## 1 Street Address City State
## 2 211 N ERVAY ST Dallas TX
## 3 7647 FERGUSON RD Dallas TX
## 4 716 BIMEBELLA LN Dallas TX
## 5 5600 L B J FWY Dallas TX
## 6 4600 S MALCOLM X BLVD Dallas TX
## LOCATION_LATITUDE LOCATION_LONGITUDE INCIDENT_REASON REASON_FOR_FORCE
## 1 Latitude Longitude SERVICE_TY UOF_REASON
## 2 32.782205 -96.797461 Arrest Arrest
## 3 32.798978 -96.717493 Arrest Arrest
## 4 32.73971 -96.92519 Arrest Arrest
## 5 Arrest Arrest
## 6 Arrest Arrest
## TYPE_OF_FORCE_USED1 TYPE_OF_FORCE_USED2 TYPE_OF_FORCE_USED3
## 1 ForceType1 ForceType2 ForceType3
## 2 Hand/Arm/Elbow Strike
## 3 Joint Locks
## 4 Take Down - Group
## 5 K-9 Deployment
## 6 Verbal Command Take Down - Arm
## TYPE_OF_FORCE_USED4 TYPE_OF_FORCE_USED5 TYPE_OF_FORCE_USED6
## 1 ForceType4 ForceType5 ForceType6
## 2
## 3
## 4
## 5
## 6
## TYPE_OF_FORCE_USED7 TYPE_OF_FORCE_USED8 TYPE_OF_FORCE_USED9
## 1 ForceType7 ForceType8 ForceType9
## 2
## 3
## 4
## 5
## 6
## TYPE_OF_FORCE_USED10 NUMBER_EC_CYCLES FORCE_EFFECTIVE
## 1 ForceType10 Cycles_Num ForceEffec
## 2 NULL Yes
## 3 NULL Yes
## 4 NULL Yes
## 5 NULL Yes
## 6 NULL No, Yes
#let's starts exploring the variables
for(i in 1:length(names(police))){
cat(i,"\b)",names(police)[i],", ")
}
## 1 ) INCIDENT_DATE , 2 ) INCIDENT_TIME , 3 ) UOF_NUMBER , 4 ) OFFICER_ID , 5 ) OFFICER_GENDER , 6 ) OFFICER_RACE , 7 ) OFFICER_HIRE_DATE , 8 ) OFFICER_YEARS_ON_FORCE , 9 ) OFFICER_INJURY , 10 ) OFFICER_INJURY_TYPE , 11 ) OFFICER_HOSPITALIZATION , 12 ) SUBJECT_ID , 13 ) SUBJECT_RACE , 14 ) SUBJECT_GENDER , 15 ) SUBJECT_INJURY , 16 ) SUBJECT_INJURY_TYPE , 17 ) SUBJECT_WAS_ARRESTED , 18 ) SUBJECT_DESCRIPTION , 19 ) SUBJECT_OFFENSE , 20 ) REPORTING_AREA , 21 ) BEAT , 22 ) SECTOR , 23 ) DIVISION , 24 ) LOCATION_DISTRICT , 25 ) STREET_NUMBER , 26 ) STREET_NAME , 27 ) STREET_DIRECTION , 28 ) STREET_TYPE , 29 ) LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION , 30 ) LOCATION_CITY , 31 ) LOCATION_STATE , 32 ) LOCATION_LATITUDE , 33 ) LOCATION_LONGITUDE , 34 ) INCIDENT_REASON , 35 ) REASON_FOR_FORCE , 36 ) TYPE_OF_FORCE_USED1 , 37 ) TYPE_OF_FORCE_USED2 , 38 ) TYPE_OF_FORCE_USED3 , 39 ) TYPE_OF_FORCE_USED4 , 40 ) TYPE_OF_FORCE_USED5 , 41 ) TYPE_OF_FORCE_USED6 , 42 ) TYPE_OF_FORCE_USED7 , 43 ) TYPE_OF_FORCE_USED8 , 44 ) TYPE_OF_FORCE_USED9 , 45 ) TYPE_OF_FORCE_USED10 , 46 ) NUMBER_EC_CYCLES , 47 ) FORCE_EFFECTIVE ,
for(i in 1:length(colnames(police))){
cat(class(colnames(police)[i]),"\b, ")
}
## character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character , character ,
{r message=FALSE,warning=FALSE,echo=FALSE, eval=TRUE} # library(lubridate) # #converting all the mismarked variable types to this original datatype # police$INCIDENT_DATE <- as.Date(paste(police$INCIDENT_DATE)) # police$INCIDENT_TIME <- as.Date(paste(police$OFFICER_HIRE_DATE)) # police$OFFICER_YEARS_ON_FORCE <- as.numeric(paste(police$OFFICER_YEARS_ON_FORCE)) # police$INCIDENT_TIME <- as_datetime(paste(police$INCIDENT_TIME)) # ##have a look at the summary of the data
# print(summary(police))
library(summarytools)
view(dfSummary(police))
#Analysing the various levels and frequency of the categorical variable
print(".........................................................................")
## [1] "........................................................................."
table(OFFICER_GENDER)
## OFFICER_GENDER
## Female Male OffSex
## 240 2143 1
print(".........................................................................")
## [1] "........................................................................."
table(OFFICER_RACE)
## OFFICER_RACE
## American Ind Asian Black Hispanic OffRace Other
## 8 55 341 482 1 27
## White
## 1470
print(".........................................................................")
## [1] "........................................................................."
table(SUBJECT_GENDER)
## SUBJECT_GENDER
## CitSex Female Male NULL Unknown
## 1 440 1932 10 1
print("..........................................................................")
## [1] ".........................................................................."
table(SUBJECT_RACE)
## SUBJECT_RACE
## American Ind Asian Black CitRace Hispanic NULL
## 1 5 1333 1 524 39
## Other White
## 11 470
#Eliminating some levels from the variables)
police$OFFICER_GENDER <- droplevels(OFFICER_GENDER,exclude = "OffSex")
police$SUBJECT_GENDER <- droplevels(SUBJECT_GENDER, exclude = c("CitSex","NULL" , "Unknown"))
police$OFFICER_RACE <- droplevels(OFFICER_RACE, exclude=c("OffRace","Other"))
police$SUBJECT_RACE <- droplevels(SUBJECT_RACE, exclude=c("CitRace","Other","NULL"))
sum(is.na(police)) # calculating total na values
## [1] 92
police <- na.omit(police) # removing na values
Data Visualization has a huge impact on human perpective.I remember a quote “A picture is worth a thousand words”, we all accept it because when looking at a list of data and extracting information from it is very difficult and time consuming if we are doing that manually, here comes the importance for data visualization. Data visualization bounces us a strong awareness of what the data means by giving it visual framework using maps or graphs. This makes the data more natural for the human mind to realize and therefore makes it informal to recognize trends, patterns, and outliers within huge data sets. Ggplot is the most widely used package for data visualization.
And most of the cases they have used verbal command and weapon display in order to comply.
### This plot displays the incidents happened across various beats,streets and sector